/*
Inputs: raw csv files of precinct-level and district-level electoral outcomes for 8 elections:
			> ltw_2013_stimmbezirke_zweitstimmen.csv 	[state 2013]
			> btw_2013_wahlbezirke_zweitstimmen.csv  	[federal 2013]
			> euw_2014_wahlbezirke.csv 					[european 2014]
			> btw_2017_zweitstimmen_wahlbezirke.csv 	[federal 2017]
			> euw_2019_amtlich_ergebnisdownload.csv 	[european 2019]
			> srw_endergebnisse_stimmbezirke.csv 		[municipal 2014]
			> srw_2020_ergebnisdownload.xlsx			[municipal 2020]
			
Outputs:
			> tmp/raw_stimmbez_all_elections 	  [Precinct-level electoral outcomes]
			> tmp/stadtbez_postalcomp_partyoutc   [District-level electoral outcomes]

Tasks:
A) Read the raw election files containing election outcomes at the precinct level 
	("Stimmbezirk"-level) and append into one single file
	
Note: Compute % shares of party results as:
		municipal elections (city council): #votes/ total #valid votes 
		all other elections: #voter/ (#voters - #invalid_votes)
		
B) Compute district-level ("Stadtbezirk"-level) outcomes (overall turnout, share mail-in, party outcomes)


				
*/

********************************************************************************
// A: Read raw election files an Append into single file (polling place voters only) //
********************************************************************************

****** STEP 1: define programm applicable to each file ******

* i) Clean Voter info (applicable to all elections)
cap program drop clean_elections
program  clean_elections
	
	missings dropobs, force
	
	cap rename wahlbezirksart stimmbezirksart // EuE-19
	
* gen Stimmbezirks-ID and clean ID (strip of 3. digit "Zero")
	tostring gebietsnummer, gen(stimmbezirk)
	drop 	 gebietsnummer
	
	replace 	stimmbezirk = "0"+ stimmbezirk if strlen(stimmbezirk) == 4
	replace 	stimmbezirk = substr(stimmbezirk,1,2) +substr(stimmbezirk,4,.)
	destring	stimmbezirk, replace
	lab var		stimmbezirk "Stimmbezirk ID (string)"
	
* rename Variables
	rename wahlberechtigtegesamt				wahlber_gesamt
	rename wahlberechtigteohnewahlschein		wahlber_ohne_wahlschein
	rename wahlberechtigtemitwahlschein			wahlber_mit_wahlschein
	rename wahlberechtigtenichtimwählerv*		wahlber_nicht_im_waehlerv
	rename wählergesamt							waehler_gesamt
	rename wählermitwahlschein					waehler_mit_wahlschein
	rename ungültigestimmen						ungueltige_stimmen         
	
	cap rename *ü* *ue*
	cap rename *ä* *ae*

* GEN Variable
	gen 	waehler_ohne_wahlschein = 			waehler_gesamt - waehler_mit_wahlschein
	
 * Lab variables in english	
	lab var waehler_ohne_wahlschein 			"Voters w/o polling card"
	lab var wahlber_gesamt						"Nbr of eligible voters"
	lab var wahlber_ohne_wahlschein				"Eligible voters w/o polling card"
	lab var wahlber_mit_wahlschein				"Eligible voters with polling card"
	lab var wahlber_nicht_im_waehlerv			"Eligible voters NOT in electoral roll"
	lab var waehler_gesamt						"Nbr of voters"
	lab var waehler_mit_wahlschein				"Eligible voters with polling card"
	lab var ungueltige_stimmen        			"Invalid votes"
	

end

* ii) Clean Outcomes 
cap program drop clean_outcomes1
program  clean_outcomes1
	
	
	// SE-13 FE-13 EuE-14 ME-14
	cap confirm variable partei
	if !_rc {
		rename partei 			v1
		rename anzahlstimmen 	v2
		rename prozentzahl 		v3

	
		rename v* v#, addnumber

		forvalues k= 1(3)80 {
			cap confirm variable v`k'
			if _rc {
				continue
			}
			local l = `k' +1
			local m = `k' +2
			
			sort v`k'
			assert v`k'[1] == v`k'[_N]
			local name = v`k'[1]
			local partei = subinstr("`name'"," / FW FREIE WÄHLER München e.V.", "",.)
			local partei = subinstr("`partei'","Ü", "UE",.)
			local partei = subinstr("`partei'","Ö", "OE",.)
			local partei = subinstr("`partei'","Ä", "AE",.)
			local partei = subinstr("`partei'","/", "_",.)
			local partei = lower(subinstr("`partei'",".", "",.))
			local partei = lower(subinstr("`partei'"," ", "",.))
			rename v`l' anz_`partei'
			lab var anz_`partei' "Votes for `name'"
			
		}
	}
	cap drop v* // drop Shares/Party name
	
	// FE-17 EuE19 SE-18 ME-20
	cap rename anzahl* anz_=
	cap rename *anzahl* **
	cap drop anteil*
	cap rename *fwmuenchen* **
	
	// remove Umlaute for all elections
	cap rename *ö* *oe*
	cap rename *ü* *ue*
	cap rename *ä* *ae*
	
end
			


****** STEP 2: Read individual election files (precinct-level variables) ******

tempfile ergebnis_ltw2013 ergebnis_btw2013 ergebnis_kow2014 ergebnis_euw2014 ///
			ergebnis_btw2017 ergebnis_ltw2018 ergebnis_euw2019 ergebnis_kow2020


* 1) SE-13
	import delim "$rawdata/election_office/ltw_2013/ltw_2013_stimmbezirke_zweitstimmen.csv", encoding("utf-8") delim(";") varnames(1) clear
	// run program
	clean_elections
	clean_outcomes1
	// gen Election ID
	gen wahl = "LTW13"
	// save
	save `ergebnis_ltw2013'

* 2) FE-13
	import delim "$rawdata/election_office/btw_2013/btw_2013_wahlbezirke_zweitstimmen.csv", encoding("utf-8") delim(";")  varnames(1) clear
	// run program
	clean_elections
	clean_outcomes1
	// gen Election ID
	gen wahl = "BTW13"
	// save
	save `ergebnis_btw2013'
	

* 3) EuE-14
	import delim "$rawdata/election_office/euw_2014/euw_2014_wahlbezirke.csv", encoding("utf-8") delim(";")  varnames(1) clear
	// run program
	clean_elections
	clean_outcomes1
	// gen Election ID
	gen wahl = "EUW14"
	// save
	save `ergebnis_euw2014'
	

* 4) FE-17
	import delim "$rawdata/election_office/btw_2017/btw_2017_zweitstimmen_wahlbezirke.csv", encoding("utf-8") delim(";")  varnames(1) clear
	// run program
	clean_elections
	clean_outcomes1

	// gen Election ID
	gen wahl = "BTW17"
	// save
	save `ergebnis_btw2017'
	
* 5) EuE-19
	import excel "$rawdata/election_office/euw_2019/euw_2019_amtlich_ergebnisdownload.xlsx", clear ///
			  sheet("WahlbezirkeStimmen") case(lower) firstrow
	// run program
	clean_elections
	clean_outcomes1
	// gen Election ID
	gen wahl = "EUW19"
	// save
	save `ergebnis_euw2019'
	
* 6) SE-18
	import excel "$rawdata/election_office/ltw_2018/ltw_2018_amtlich_ergebnisdownload.xlsx", clear ///
			  sheet("StimmbezirkeZweitstimmen") case(lower) firstrow
	// run program
	clean_elections
	clean_outcomes1
	// gen Election ID
	gen wahl = "LTW18"
	// save
	save `ergebnis_ltw2018'
	
* 7) ME-14 (city council "Stadtratswahlen")
	import delim "$rawdata/election_office/kommw_2014/srw_endergebnisse_stimmbezirke.csv", encoding("utf-8") delim(";")  varnames(1) clear
	// run program
	clean_elections
	clean_outcomes1
	// NOTE: SRW include about 80 votes per person => % voting shares obtained by first computing total #votes
	egen kow_stimmenges = rowtotal(anz_*)
	lab var kow_stimmenges "Ges. gültige Stimmen STADTRATSWAHLEN (nur KOW)"
	// gen Election ID
	gen wahl = "KOW14"
	// save
	save `ergebnis_kow2014'
	
* 8) ME-20 (city council "Stadtratswahlen")
	import excel "$rawdata/election_office/kommw_2020/srw_2020_ergebnisdownload.xlsx", clear ///
			  sheet("StimmbezirkeStimmen") case(lower) firstrow
	// run program
	clean_elections
	clean_outcomes1
	// NOTE: SRW include about 80 votes per person => % voting shares obtained by first computing total #votes
	egen kow_stimmenges = rowtotal(anz_*)
	lab var kow_stimmenges "Ges. gültige Stimmen STADTRATSWAHLEN (nur KOW)"
	// gen Election ID
	gen wahl = "KOW20"
	// save
	save `ergebnis_kow2020'
clear

	
****** STEP 3: Append files & finalize & save ******
	* append
	foreach f in "btw2013" "btw2017" "euw2014" "euw2019" "kow2020" "kow2014" "ltw2013" "ltw2018" {
		append using `ergebnis_`f''
	}
	order anz_*, last
	lab var wahl 	"Election ID (string)"
	
	* gen Stadtbezirk
	gen stadtbezirk = floor(stimmbezirk/100)
	lab var stadtbezirk "District ID (Stadtbezirk)"
	
	* drop precinct = 9901 (1 obs in EuE-19) => some votes not assignable to precinct
	drop if stimmbezirk == 9901
		drop if stimmbezirk == 16 // DROP Munich whole city

	
	
 *** SAVE: in-person + mail-in votes at DISTRICT level
	*	Note: except for EuE-19 and ME-20, mail voters (Stimmbez-Art = 21) already included here
	preserve
		collapse (sum) wahlber* waehl* ung* anz_* kow_stimmenges, by(wahl stadtbezirk stimmbezirksart)
		save "$tmp/stadtbezirk_outcomes.dta", replace
	restore
	
	// KEEP in-person votes + polling card requests
	keep if stimmbezirksart==11 	
	
 ***SAVE: PRECINCT-level outcomes
	save "$tmp/raw_stimmbez_all_elections.dta", replace
	
	
	

********************************************************************************
// B: 	Prepare district-level electoral outcomes
********************************************************************************

*** STEP 1: Read district-level ("Stadtbezirk") outcomes for ME-20 and EuE-19 ***
	* Note: Other elections files already include mail-in voters (Gebietsart = 21)
	
* ME-20
	import excel "$rawdata/election_office/kommw_2020/srw_2020_ergebnisdownload.xlsx", clear ///
		  sheet("StadtbezirkeStimmen") case(lower) firstrow

	missings dropvars, force
	
	gen stadtbezirk = gebietsnummer
	
	// clean
	clean_elections
	clean_outcomes1
	drop stimmbezirk 
	
	// NOTE: SRW (city council) include about 80 votes per person => % voting shares obtained by first computing total #votes
	egen kow_stimmenges = rowtotal(anz_*)
	lab var kow_stimmenges "Ges. gültige Stimmen STADTRATSWAHLEN (nur ME)"
	
	* keep mail voters
	keep if stimmbezirksart == 21

	drop if stadtbezirk == 162

	* gen Election ID
	gen wahl = "KOW20"
	
	* save
	tempfile kow2020_stadtbez
	save `kow2020_stadtbez'
	
	
* EUW 2019
	import excel "$rawdata/election_office/euw_2019/euw_2019_amtlich_ergebnisdownload.xlsx", clear ///
			  sheet("StadtbezirkeStimmen") case(lower) firstrow
		
	missings dropobs, force
	missings dropvars, force
	
	
	gen stadtbezirk = gebietsnummer
	
	// clean
	clean_elections
	clean_outcomes1
	drop stimmbezirk 

	* keep mail voters
	keep if stimmbezirksart == 21
		
	drop if stadtbezirk == 99
	drop if stadtbezirk == 162
	
	* gen Election ID
	gen wahl = "EUW19"
	
	* save
	tempfile euw2019_stadtbez
	save `euw2019_stadtbez'
	
	
/* STEP 2: Append and compute Variables */

	
	* Pull: poll and mail-in votes at district level
	use "$tmp/stadtbezirk_outcomes.dta", clear
	
	* append KOW20 and EUW19
	append using `euw2019_stadtbez'
	append using `kow2020_stadtbez'	
	sort wahl stadtbezirk stimmbezirksart
	lab var wahl 	"Election ID (string)"

	
	* Gen auxilliary district-level variables
	// mail-in votes
	bys wahl stadtbezirk (stimmbezirksart): gen waehler_21 = waehler_gesamt[_N]						
	// eligible voters with polling card
	bys wahl stadtbezirk (stimmbezirksart): gen wahlber_mit_11 = wahlber_mit_wahlschein[1]			
	// poll voters with polling card
	bys wahl stadtbezirk (stimmbezirksart): gen waehler_mit_11 = waehler_mit_wahlschein[1]			
	// poll voters total
	bys wahl stadtbezirk (stimmbezirksart): gen waehler_ges_11 = waehler_gesamt[1]					
	// poll voters w/o polling card
	bys wahl stadtbezirk (stimmbezirksart): gen waehler_ohne_11 = waehler_ges_11 - waehler_mit_11	
	// eligble voters total
	bys wahl stadtbezirk (stimmbezirksart): assert wahlber_gesamt[1] != 0
	bys wahl stadtbezirk (stimmbezirksart): replace wahlber_gesamt = wahlber_gesamt[1] if wahlber_gesamt == 0 
	

	* Gen: district-level turnout
	// gen poll turnout (w/ and w/o polling card)
	gen sbez_turnout_urne_tot = waehler_ges_11/wahlber_gesamt
	// gen poll turnout (w/o polling card)
	gen sbez_turnout_urne_ohne = waehler_ohne_11/wahlber_gesamt
	// gen poll turnout (w/ polling card)
	gen sbez_turnout_urne_mit = waehler_mit_11/wahlber_gesamt
	
	// gen TURNOUT POSTAL
	gen sbez_turnout_pos = waehler_21/wahlber_gesamt
	
	// gen TURNOUT REQUESTED
	gen sbez_turnout_pos_req = wahlber_mit_11/wahlber_gesamt
	
	// gen TURNOUT TOTAL
	gen sbez_turnout_tot = sbez_turnout_pos + sbez_turnout_urne_tot
	
	cap drop tmp*
	
	* Lab variables 
	lab var sbez_turnout_urne_tot 	"Turnout at the polling place (w and w/o PC) (district)"
	lab var sbez_turnout_urne_mit 	"Turnout at the polling place (w PC) (district)"
	lab var sbez_turnout_urne_ohne 	"Turnout at the polling place (w/o PC) (district)"
	lab var sbez_turnout_pos  		"Turnout via mail only (district)"
	lab var sbez_turnout_tot		"Overall turnout (district)"
	lab var sbez_turnout_pos_req	"Turnout requested polling card (district)"
	
	lab var waehler_ges_11	"Votes at the polling place (w and w/o PC) (district)"
	lab var waehler_ohne_11	"Votes at the polling place (w/o PC) (district)"
	lab var waehler_21		"Votes via mail (district)"
	lab var wahlber_mit_11	"Votes requested polling card (district)"

	** GEN TOTAL Party Outcomes (Shares): sbez_shr_`partei'
	// denominator for ME and other elections
	bys wahl stadtbezirk: egen tmp_kow_stimmenges = total(kow_stimmenges) 	// ME: total valid votes
	bys wahl stadtbezirk: egen tmp_totwaehler = total(waehler_gesamt) 		// others: total votes
	bys wahl stadtbezirk: egen tmp_ung = total(ungueltige_stimmen) 			// others: total invalid votes
	gen tmp_tot_stimmenges = tmp_totwaehler - tmp_ung						// others: total valid votes
	
	foreach v of varlist anz_* {
		local partei = subinstr("`v'","anz_","",.)
		bys wahl stadtbezirk: egen help = total(`v')		
		gen 	sbez_shr_`partei' = help/tmp_tot_stimmenges if !inlist(wahl, "KOW20", "KOW14")
		replace sbez_shr_`partei' = help/tmp_kow_stimmenges if inlist(wahl, "KOW20", "KOW14")
		replace sbez_shr_`partei' = 0 if missing(sbez_shr_`partei')
		lab var sbez_shr_`partei' "Overall party share of `partei' (district)"
		drop help

		bys 	wahl stadtbezirk: egen sbez_anz_t`partei' = total(`v')
		lab var sbez_anz_t`partei' "Votes (total) for `partei' (district)"
		gen 	sbez_anz_u`partei'=`v' if stimmbezirksart==11
		lab var sbez_anz_u`partei' "Votes (polling place) for `partei' (district)"
		gen 	sbez_anz_p`partei'=`v' if stimmbezirksart==21
		lab var sbez_anz_p`partei' "Votes (mail-in) for `partei' (district)"
	}
	drop tmp*

	// check if shares make sense
	egen tmp = rowtotal(sbez_shr_*)
	assert inrange(tmp, .99,1.001)
	drop tmp
	
	
	// copy labels
	foreach v of varlist * {
		local `v'l : variable label `v'
	}
		
	collapse sbez_shr_* sbez_turnout* sbez_anz_* waehler_ges_11 waehler_ohne_11  waehler_21 wahlber_mit_11, by(stadtbezirk wahl)
	
	// re-label						
	foreach v of varlist * {
		lab var `v' "``v'l'"
	}
	
	rename stadtbezirk stadtbez
	lab var stadtbez "District ID (Stadtbezirk)"

	
	* save
	save "$tmp/stadtbez_postalcomp_partyoutc.dta", replace

